global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/appln_ipc.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

global pat_list_file "${classification_dir}/V6/appln_features.csv"
global out_dir "${root_dir}/classification/patstat"


********************************************************
*** 1) Create a mapping from family id to IPC codes
*** this will be used in the final classification stage of the python program
********************************************************
use ${dataset_dir}/patstat_orbis/docdb_family_id_cipc_codes.dta, clear
sort docdb_family_id
export delimited using $out_dir/docdb_family_id_cipc_codes.csv, replace 

********************************************************
*** 2) For the classification program, create a mapping from patents to cipc codes
*** this will be used for computing the automation keyword shares per ipc code
*** this has to be on the patent level
********************************************************
* Read features (appln_nr -> counts)
import delimited using $pat_list_file, stringcols(1) clear
keep appln_nr
compress

* Now merge with appln_info to get appln_id
* (for performance reasons we merge from appln_info)
tempfile features
save `features'

use ${commondata_dir}/patstat_2018b/appln_info.dta, clear
keep if appln_auth == "EP"
keep appln_id appln_nr

mmerge appln_nr using `features', unmatched(none)
mmerge appln_id using ${dataset_dir}/patstat_orbis/cipc_codes.dta, unmatched(none)
ren cipc6 ipc6
keep appln_nr ipc6

export delimited using $out_dir/appln_ipc6.csv if strlen(ipc6)>4, replace 
gen ipc4 = substr(ipc6,1,4)
drop ipc6
duplicates drop
export delimited using $out_dir/appln_ipc4.csv, replace

********************************************************
*** 3) For the classification program, create a mapping of appln_nr to appln_auth
*** this is used by the python script to restrict patents considered on a
*** specific relevant time frame
********************************************************

use ${commondata_dir}/patstat_2018b/appln_info.dta, clear
keep if appln_auth == "EP"
keep appln_id appln_nr appln_year

mmerge appln_nr using `features', unmatched(none)
keep appln_nr appln_year
export delimited using $out_dir/appln_year.csv, replace



}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat